**********************************************************************
*Hospital year level regs 
*I. PREPARE REGRESSION FILE 
**********************************************************************
	*File options 
	if inlist($option ,1,3,5) {
	local is is
	}
	if inlist($option ,2,4,6) {
	local is ss
	}
	
	capture restore 
	*Assumes the construction of a hospital-year level episode file, which contains 
	*spending summary variables and has been merged to American Hospital Association 
	*(AHA) IDs and mergers. These processing steps are described in Appendix A1, A3. 
	use "$reg\hospreg_FULL0_all1", clear
	
	*CONTROL VARS
	{
	local hc2 bdtot teach mcare mcaid type_gac
	local area rural white college unempl poverty elderly state_exp_status 	
	local pc female age utilm90 
	local plan fullyinsured
	local hospcontrol `hc2' `area' hhi
	local ptntcontrol `pc' `plan' 
	local control never 
	local treat (incl_1`is')
	local WT "[aw=wt_cemm]"
	local fe i.year	
	local print keep(bought_1`is')	
	}
	
	*DEFINE TREATED, CONTROL GROUPS 
	{		
	keep *num ye cost17 np fp `hospcontrol' `ptntcontrol' ofinterest hrrcode stcd tot_drgwt *vol* wts never prev absorb* bought bought_*s bought_1*s incl_1*s pre?_1?s* post?_1?s* d_* flag_real*

	*Drop hospitals that are absorbed/absorb another; with insufficient observations 
	foreach var in absorbing_flg absorbed_flg {
	gen t = year if `var'==1
	bysort num: gegen min`var' = min(t)
	drop t
	}
	drop if ye >= minabsorbing & !missing(minabsorbing)
	drop if ye >= minabsorbed & !missing(minabsorbed)
	drop *absorb* 
	drop if vol < 15
	
	*Define SS control group: previously bought, with no deals in 10yrs 
	*aha_othercontols contains flags for hospitals that are system-owned and have 
	*experienced no deal activity in the last 10 years (prev_10ya_nb), and for 
	*those that will be acquired in the next three years (next_3y).
	merge m:1 num ye using "$aha\tarball\aha_othercontols", keepusing(prev_10ya_nb next_3y)
	drop if _m==2
	drop _m
	foreach var in next_3y prev_10ya_nb {
	qui replace `var' = 0 if missing(`var')
	}
	if "`is'"=="ss" {
	gen     new_never = 0 
	replace new_never = 1 if prev_10ya_nb 
	tab never new_never   if ofint
	gdistinct num         if ofint & never
	gdistinct num         if ofint & new_never 
	drop never
	ren new_never never
	}
	
	*Define treated soon control group (for robustness): bought during 18-21, and not during study period (13-17) 
	if "`is'"=="is" {
	tab never next_3y, m 
	tab incl_1`is' next_3y, m
	gen never_next_3yr = never & next_3y
	}
	gen t3samp`is' = (pre3_1`is'==1 | pre2_1`is'==1 | pre1_1`is'==1 | pre0_1`is'==1 | post1_1`is' ==1 | post2_1`is' ==1 | post3_1`is'2==1 | never==1)
	}
	
	*MERGE IN VARS
	{
	*Matching weights
	merge 1:1 num ye using "$reg\fulldata_wts`is'"
	drop if _m==2
	drop _m
	replace wt_cemm = 0 if missing(wt_cemm)

	*AHA + HCRIS variables
	*These three files contain variables from the AHA survey.
	merge 1:1 num ye using "$aha\tarball\aha_services_clean2"
	drop if _m==2
	drop _m
	merge 1:1 num ye using "$aha\aha_fteexp_8.22.dta"
	drop if _m==2
	drop _m	
	merge 1:1 num ye using "$aha\aha_adjadm_clean"
	drop if _m==2
	drop _m
	
	merge m:1 old_num ye using "$hcris/hcris_cleaned.dta", keepusing(totcontractfte5 totcontractohfte5 totohfte5)
	drop if _m==2
	drop _m	
	replace totcontractfte5 = max(totcontractfte5,0) + max(totcontractohfte5,0)
	
	gen paytot2       = paytot + npayben
	gen deprinter     = dpexa  + intexa
	gen othexp        = exptot - paytot - npayben - dpexa - intexa  
	gen ftenurseskill = ftern + ftelpn
	gen pay_fte       = paytot2 / fte
	gen admtot_bed    = admtot / bdtot 
	gen adj_bed       = adj / bdtot 
	
	*Lagged HHI (HRR level) 
	*hhi_hrr_0.dta contains market concentration (HHI, calculated from bed shares
	*per Hospital Referral Region using AHA data) for each hospital-year.
	capture drop hhi 
	merge 1:1 num ye using "$aha\hhi_hrr_0", keepusing(hhi p_dhhi dhhi hhi_hsa)
	xtset num ye	
	foreach var in hhi p_dhhi dhhi hhi_hsa {
	ren `var' `var'2
	gen `var' = L1.`var'2
	}
	foreach var in hhi hhi_hsa {
	forval i = 1/4 {
	capture noisily replace `var' = L`i'.`var' if missing(`var')
	capture noisily replace `var' = `var'2     if missing(`var')
	}	
	}
	drop *hhi*2
	drop if _m==2
	drop _m
	count if missing(hhi) & ofint 
	assert r(N)==0
	
	*Elevance cardiac volume
	if "`is'"=="is" {
	preserve 
	use "$reg\cardiosample`is'", clear
	keep num ye 
	bysort num ye: gegen cardio_vol = count(ye)
	gduplicates drop 
	compress
	tempfile cardio_vol
	save `cardio_vol', replace
	restore
	
	merge 1:1 num ye using `cardio_vol'
	drop if _m==2
	drop _m 
	capture gen cardio_vol_bed = cardio_vol / bdtot 
	}
	}
	
	*NORMALIZE VARS
	{
	*Per bed 
	gen t = ye 
	replace t = . if incl_1`is' & !t3samp`is'
	bysort num: gegen minye = min(t)
	drop t 
	foreach var in bdtot admtot fte ipdtot adj {
	gen `var'_first2 = `var' if ye==minye
	bysort num: gegen `var'_first = max(`var'_first2)
	drop `var'_first2 
	}
	
	*Winsorize costs 
	foreach var in exptot paytot2 othexp deprinter fte fteoth94 totohfte5 totcontractfte5 {
	capture noisily gen pbf`var'   =`var' / bdtot_first
	
	qui sum pbf`var', d
	if inlist("`var'","exptot","paytot2"," othexp","deprinter") {
	replace pbf`var' = r(p99) if pbf`var' > r(p99) & !missing(pbf`var')
	replace pbf`var' = r(p1)  if pbf`var' < r(p1)  & !missing(pbf`var')	
	}
	}
	
	*Inflate (2017 to 2018 dollars)
	foreach var in cost17 pbfexptot pbfpaytot2 pbfothexp pbfdeprinter {
	replace `var' = `var' / 0.9762
	}
	gen ln_cost = ln(cost)
	gen ln_pbfexptot = ln(pbfexptot)
	}
	
	*LIMIT TO TREATED, CONTROL GROUPS
	{
	keep if (incl_1`is' & t3samp`is') | never 
	
	*Keep if has 2+ years of data
	bysort num: gegen ct = count(ye)
	drop if ct==1
	drop ct
	bysort num: gegen sd = sd(bought_1`is')
	replace incl_1`is' = 0 if sd==0 
	drop sd 
	keep if (incl_1`is' & t3samp`is') | never 
	
	*Keep if is in an Elevance state of interest
	keep if ofinterest==1
	
	*Drop mergers that create new systems
	drop if flag_real_`is'c==1
	}
	
	*COUNT HOSPITALS, DEALS
	{
	*Merge to deals data 
	merge 1:1 num ye using "$reg\reg_dealcodes`is'match0"	
	bysort num: gegen has_deal_`is'  = max(deal_tar_`is')
	drop if _m==2
	drop _m
	count if incl_1`is'==1 & missing(has_deal_`is') 
	count if incl_1`is'==1 & missing(has_deal_`is') & wt_cemm > 0 	
	assert r(N)==0
	
	*Deals (unmatched, matched)
	gdistinct has_deal_`is'  if incl_1`is'==1 
	gdistinct has_deal_`is'  if incl_1`is'==1 & wt_cemm > 0 
	*Targets (unmatched, matched)
	gdistinct num            if incl_1`is'==1 
	gdistinct num            if incl_1`is'==1 & wt_cemm > 0 
	*Control hospitals (unmatched, matched)
	gdistinct num if never      
	gdistinct num if never      & wt_cemm  > 0
	}	
	
	*ROBUSTNESS & EXPERIENCE VARS 
	{	
	*Cluster by deal 
	bysort num incl_1`is': gegen deal_cluster = max(has_deal_`is')		
	replace deal_cluster = . if incl_1`is'!=1
	count if incl_1`is'==1 & missing(deal_cluster)
	assert r(N)==0
	replace deal_cluster = num if never==1 | missing(deal_cluster)
	
	*HCAHPS quality 
	ren num ccn 
	merge 1:1 ccn ye using "$sdb\tarball\quality_11_18"
	drop if _m==2
	drop _m
	ren ccn num_prvdr_num
	
	*Generate composite score
	local hc communic_nurse communic_dr quick rating recommend
	foreach var in `hc' {
	qui sum `var', d
	gen z_`var' = (`var'-r(mean))/r(sd)
	}
	gegen z_avg = rowmean(z_*)
	
	*"Balanced" panel (robustness) 
	*Treated must have years {-2,-1,0,1,2}; nevers must have 5+ years 
	gen bal_22_only= 0 
	foreach var in pre2_1`is' pre1_1`is' pre0_1`is' post1_1`is' post2_1`is' {
	bysort num: gegen _has_`var' = max(`var')
	replace bal_22_only = 1 if `var'==1
	}
	gegen bal_22 = rowtotal(_has_*)	
	drop _has_*
	bysort num: gegen ct = count(ye)
	
	tab bal_22 if incl_1`is'
	gen     has_bal_22  = bal_22==5  & incl_1`is'
	replace has_bal_22  = 1 if ct>=5 & never
	replace bal_22_only = 1 if ct>=5 & never
	replace bal_22_only = 0 if !has_bal_22
	drop bal_?? ct
	
	foreach i in 22 {
	di "total, treated" 
	gdistinct num if has_bal_`i' 
	gdistinct num if has_bal_`i' & incl_1`is'
	di "match wts"
	gdistinct num if has_bal_`i' & wt_cemm > 0 
	gdistinct num if has_bal_`i' & wt_cemm > 0 & incl_1`is'
	}
	
	*Controls within 5mi of treateds (robustness)
	*never_within_all_`is' contains flags for control hospitals within 5mi of a 
	*treated hospital.
	sort num ye
	merge 1:1 num ye using "$reg\never_within_all_`is'", sorted
	drop if _m==2
	drop _m
	bysort num: gegen within2 = max(within)
	drop within5
	}
	
	*HETEROGENEITY VARS 
	{
	*Target, acquirer size (GAC hospital count in AHA)
	capture drop prea_ct_`is' pre_ct_tar_`is' 
	merge 1:1 num ye using "$aha\num_dealcodes", keepusing(deal_?? acq) 
	ren _m __m
	merge 1:1 num ye using "$reg\reg_dealcodes`is'match0", keepusing(pre_ct_tar_`is')
	bysort deal_`is': gegen acq_ct = sum(acq)
	
	*Impute 
	xtset num ye 
	foreach var in acq_ct pre_ct_tar_`is' {
	replace `var' = . if missing(deal_`is')
	replace `var' = L1.`var' if missing(`var') & "`is'"=="is" & num==330096 & ye==2016	
	replace `var' = L1.`var' if missing(`var') & "`is'"=="ss" & num==140063 & ye==2014	
	replace `var' = L2.`var' if missing(`var') & "`is'"=="ss" & num==340020 & ye==2018
	replace `var' = L2.`var' if missing(`var') & "`is'"=="ss" & num==310112 & ye==2018
	replace `var' = L1.`var' if missing(`var') & "`is'"=="ss" & num==340075 & ye==2014	
	replace `var' = L1.`var' if missing(`var') & "`is'"=="ss" & num==450032 & ye==2018	
	replace `var' = . if incl_1`is'!=1
	}
	drop if _m==2 | __m==2
	drop _m __m acq deal_??
	
	*Target-acquirer distance
	*acq_dist contains the minimum distance between each target and its acquirers 
	*in miles.
	sort num ye
	merge m:1 num ye using "$aha\acq_dist", keepusing(acqdist_min)		
	foreach var in min {
	bysort num incl_1`is': gegen a_`var'dist = max(acqdist_`var')	
	bysort num:            gegen t           = max(acqdist_`var')	
	replace a_`var'dist = t if missing(a_`var'dist)
	replace a_`var'dist = . if incl_1`is'!=1
	drop acqdist_`var' t
	}
	drop if _m==2
	drop _m 
	gdistinct num if incl_1`is'==1 & missing(a_min)
	assert r(N)==0 
	
	*In_[geography]
	*taracq contains flags for targets in the same {state, HRR, HSA} as at least 
	*one of their acquirers.
	merge m:1 num ye using "$aha\taracq", keepusing(in_*)
	drop if _m==2
	drop _m	
	ren in_hrr inhrr
	foreach var in inhrr {
	bysort num bought_1`is' incl_1`is': gegen `var'_`is' = max(`var')
	replace `var'_`is' = 0 if !bought_1`is'
	replace `var'_`is' = . if !incl_1`is' 
	bysort num incl_1`is': gegen has`var'_`is' = max(`var'_`is')
	replace has`var'_`is' = . if !incl_1`is'
	}
	count if incl_1is==1 & missing(hasinhrr_`is')
	assert r(N)==0 
	
	*Predicted change in HHI 
	replace p_dhhi = 0 if !missing(p_dhhi) & inhrr_`is'==0 	
	gen temp2 = p_dhhi if pre0_1`is'==1
	bysort num: gegen temp = max(temp2)
	sum p_dhhi if missing(temp) & incl_1`is'
	replace temp = 0 if missing(temp)
	
	gen pos_pdhhi     = (temp > 0.0000001)
	gen pos_pdhhi200  = (temp > 0.02)
	replace pos_pdhhi = 1 if hasinhrr_`is'==1 & "`is'"=="is"		
	tab pos_pdhhi    if incl_1`is'==1, m
	tab pos_pdhhi200 if incl_1`is'==1, m
	drop temp* 
	
	*Target size (beds)
	gen tar_bed_`is' = bdtot_first if incl_1`is' 
	
	*Pre-merger price 
	gen precost = . 
	foreach i in 1 2 3 {
	gen t = cost17 if pre`i'_1`is'==1 
	bysort num: gegen t2 = max(t)	
	replace precost = t2 if missing(precost)
	drop t t2
	}
	
	*FP/NP acquiring system 
	*np_sysid contains indicators of whether systems are majority non-profit or
	*for-profit.
	merge 1:1 num ye using "$aha\np_sysid", keepusing(sys_own sysid)
	drop if _m==2 
	drop _m
	gdistinct sysid 
	gdistinct sysid num if bought_1is & incl_1is & sys_own==2
	gdistinct sysid num if bought_1is & incl_1is & sys_own==3
	gen acq_np = sys_o==2 & bought_1`is'==1
	gen acq_fp = sys_o==3 & bought_1`is'==1
	drop sysid sys_own 
	
	*Fix heterogeneity variables at hospital level
	local het *dhhi* hasin* *tar_bed_`is' pre_ct* acq_* precost a_min 
	unab het: `het'
	foreach var in `het' {
	ren `var' `var'2	
	if "`var'"=="precost" {
	bysort num incl_1`is': gegen `var' = mean(`var'2)
	}
	else {
	bysort num incl_1`is': gegen `var' = max(`var'2)
	}
	replace `var' = . if incl_1`is'!=1	
	drop `var'2
	}
	
	*Flag if above median 
	local cont_het tar_bed pre_ct_tar acq_ct precost a_mindist 
	unab cont_het: `cont_het'
	foreach var in `cont_het' {
	replace `var'    = . if !incl_1`is'
	sum     `var', d
	gen hi_`var'     = (`var' >= r(p50))
	replace hi_`var' = . if missing(`var')
	}
	local het `het' hi_* 
	unab het: `het' 
	foreach var in `het' {
	qui replace `var' = 0 if never==1
	}
	}
	
	*SAVE FILES 
	{
	*Heterogeneity vars
	preserve	
	keep num ye `het' incl_1`is'
	keep if incl_1`is'==1
	drop ye 
	ds num, not 
	foreach var in `r(varlist)' {
	ren `var' `var'2
	bysort num: gegen `var' = max(`var'2)
	drop `var'2
	}
	gduplicates drop 
	gduplicates report num 
	sum 
	gen is = "`is'"
	save "$reg\hets_FULL_`is'", replace
	restore
	
	*Regression data 
	capture gen never_next_3yr = 1 
	save "$reg\FULLsample`is'", replace
	
	*Non-sensitive data (transfer to Medicare server)
	keep num year never bought_1`is' incl_1`is' bdtot teach rural mcare mcaid white college unempl poverty elderly state_exp_status wt_cemm_s hhi
	save "$op\FULLsample_for_atul`is'", replace 
	}